{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Problem: Write a custom Dataset and Dataloader to load from a CSV file\n",
    "\n",
    "### Problem Statement\n",
    "You are tasked with creating a **custom Dataset** and **Dataloader** in PyTorch to load data from a given `data.csv` file. The loaded data will be used to run a pre-implemented linear regression model.\n",
    "\n",
    "### Requirements\n",
    "1. **Dataset Class**:\n",
    "   - Implement a class `CustomDataset` that:\n",
    "     - Reads data from a provided `data.csv` file.\n",
    "     - Stores the features (X) and target values (Y) separately.\n",
    "     - Implements PyTorch's `__len__` and `__getitem__` methods for indexing.\n",
    "\n",
    "2. **Dataloader**:\n",
    "   - Use PyTorch's `DataLoader` to create an iterable for batch loading the dataset.\n",
    "   - Support user-defined batch sizes and shuffling of the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import pandas as pd\n",
    "\n",
    "torch.manual_seed(42)\n",
    "X = torch.rand(100, 1) * 10  # 100 data points between 0 and 10\n",
    "y = 2 * X + 3 + torch.randn(100, 1)  # Linear relationship with noise\n",
    "\n",
    "# Save the generated data to data.csv\n",
    "data = torch.cat((X, y), dim=1)\n",
    "df = pd.DataFrame(data.numpy(), columns=['X', 'y'])\n",
    "df.to_csv('data.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "import pandas as pd\n",
    "\n",
    "# TODO: Add the missing code\n",
    "class LinearRegressionDataset(Dataset):\n",
    "    ...\n",
    "\n",
    "# Example usage of the DataLoader\n",
    "dataset = LinearRegressionDataset('data.csv')\n",
    "# TODO: Add the missing code\n",
    "dataloader = ...\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [100/1000], Loss: 1.1354\n",
      "Epoch [200/1000], Loss: 0.4609\n",
      "Epoch [300/1000], Loss: 1.4676\n",
      "Epoch [400/1000], Loss: 0.3768\n",
      "Epoch [500/1000], Loss: 1.1760\n",
      "Epoch [600/1000], Loss: 0.9092\n",
      "Epoch [700/1000], Loss: 0.3547\n",
      "Epoch [800/1000], Loss: 0.6438\n",
      "Epoch [900/1000], Loss: 0.3861\n",
      "Epoch [1000/1000], Loss: 0.8075\n"
     ]
    }
   ],
   "source": [
    "# Define the Linear Regression Model\n",
    "class LinearRegressionModel(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(LinearRegressionModel, self).__init__()\n",
    "        self.linear = nn.Linear(1, 1)  # Single input and single output\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.linear(x)\n",
    "\n",
    "# Initialize the model, loss function, and optimizer\n",
    "model = LinearRegressionModel()\n",
    "criterion = nn.MSELoss()\n",
    "optimizer = optim.SGD(model.parameters(), lr=0.01)\n",
    "\n",
    "# Training loop\n",
    "epochs = 1000\n",
    "for epoch in range(epochs):\n",
    "    for batch_X, batch_y in dataloader:\n",
    "        # Forward pass\n",
    "        predictions = model(batch_X)\n",
    "        loss = criterion(predictions, batch_y)\n",
    "\n",
    "        # Backward pass and optimization\n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "\n",
    "    # Log progress every 100 epochs\n",
    "    if (epoch + 1) % 100 == 0:\n",
    "        print(f\"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Learned weight: 1.9099, Learned bias: 3.1883\n",
      "Predictions for [[4.0], [7.0]]: [[10.827767372131348], [16.55739974975586]]\n"
     ]
    }
   ],
   "source": [
    "# Display the learned parameters\n",
    "[w, b] = model.linear.parameters()\n",
    "print(f\"Learned weight: {w.item():.4f}, Learned bias: {b.item():.4f}\")\n",
    "\n",
    "# Testing on new data\n",
    "X_test = torch.tensor([[4.0], [7.0]])\n",
    "with torch.no_grad():\n",
    "    predictions = model(X_test)\n",
    "    print(f\"Predictions for {X_test.tolist()}: {predictions.tolist()}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
